/*
This do file creates selected Figures for the online appendix to:
Military Service and Immigrants' Integration: Evidence from the Vietnam Draft Lotteries

Note that all the underlying data for these Figures are publicly-available.

For results produced from the restricted-use census files, please consult the file "redacted_analysis.do"
*/


/* 
Figures A2.1 and A2.2 are created using the IPUMS public-use samples 1900 - 2000, 
plus the 2010 ACS and 2019 ACS

First some data cleanup:
*/

* Load data here.

// rounding the weight variable
gen wt = round(perwt)

// dropping the 1970 form2 sample
drop if sample == 197002

// people born outside the US, but counting territorials
gen immigrant = bpl > 99

// categorizing immigrants		
cap drop composition
gen composition = .
replace comp = 1 if bpl >= 100 & bpl <= 120 // US Territorials
replace comp = 2 if bpl >= 150 & bpl < 200 // canada + islands
replace comp = 3 if bpl == 200 // mexico
replace comp = 4 if bpl > 200 & bpl <= 300 // rest of latin america
replace comp = 5 if bpl >= 400 & bpl <= 429 // NW Europe
replace comp = 6 if bpl >= 430 & bpl < 500 // SE Europe
replace comp = 5 if bpl == 450 | bpl == 453 // Austria and Germany
replace comp = 7 if bpl >= 500 & bpl < 600 // Asia
replace comp = 6 if bpl == 531 // cyprus
replace comp = 8 if bpl == 600 // Africa
replace comp = 5 if bpl == 700 // Australia + New Zealand

label define complab 1 "US Territories" 2 "Canada" 3 "Mexico" 4 "Rest of Latin America" ///
	5 "NW Europe + Australia + New Zealand" 6 "SE Europe" 7 "Asia" 8 "Africa", modify
label values composition complab

save "Samples_1900_2019_clean.dta", replace



* Figure A2.1: Origins of the Foreign-Born over Time
* --------------------------------------------------

use "Samples_1900_2019_clean.dta", clear
	
drop if composition == .
contract composition sample [fweight = wt]	

gen year = round(sample / 100)

twoway ///
	(scatter _freq year if composition == 1, connect(l) msym(D)) ///
	(scatter _freq year if composition == 2, connect(l) msym(O)) ///
	(scatter _freq year if composition == 3, connect(l) msym(T)) ///
	(scatter _freq year if composition == 4, connect(l) msym(S)) ///
	(scatter _freq year if composition == 5, connect(l) msym(X)) ///	
	(scatter _freq year if composition == 6, connect(l) msym(+)) ///
	(scatter _freq year if composition == 7, connect(l) msym(arrowf)) ///	
	(scatter _freq year if composition == 8, connect(l) msym(pipe)) ///	
	, xlab(1900(10)2020) xtitle("") ///
	ylab(0 "0" 4000000 "4" 8000000 "8" 12000000 "12" 16000000 "16") ///
	ytitle("Foreign Born (in Millions)") ///
	legend(order(1 "US Territories" 2 "Canada" 3 "Mexico" 4 "Latin Am." 5 "NW Europe" ///
		6 "S+E Europe" 7 "Asia" 8 "Africa"))
graph export "${output}compositions_fb_overtime.png", as(png) replace


* Figure A2.2: Origins of Draft-Eligible versus Older Immigrants in 1970
* ----------------------------------------------------------------------

use "Samples_1900_2019_clean.dta", clear

// keeping the 1970 sample only (version with citizenship variable)
keep if sample == 197001

// dropping native-born american citizens
drop if bpl <= 99

// keeping men only
drop if sex == 2

// dropping people born abroad to american parents
drop if citizen == 1

// focusing on the birthyears corresponding to our main sample
gen tagg = 0
replace tagg = 1 if age >= 18 & age <= 21
replace tagg = . if age >= 0 & age <18

label define tagglab 0 "Immigrant Men Aged 22+" 1 "Immigrant Men Aged 18-21", modify
label values tagg tagglab

// redefining composition
drop if composition == .

gen comp2 = composition
recode comp2 (4 = 9) (7 = 9) (8 = 9)

label define comp2lab 1 "US Territorials" 2 "Canada" 3 "Mexico" 5 "NW Europe" 6 "S+E Europe" 9 "Rest of World", modify
label values comp2 comp2lab

// graphing
graph bar (percent) [fweight = wt], over(comp2, label(angle(45))) by(tagg, note("")) ///
	blabel( bar, format(%03.2f) ) ///
	ylab(0(5)40) ///
	ytitle("Percent")
graph export "${output}1970s_profiles_draftage.png", as(png) replace





/* 
Figures A5.3 - A5.8 are created using the IPUMS public-use 2000 5% sample.

In our main paper, we calculated residential integration at the census tract and 
census block group level.

These variables are not available in the public-use IPUMS files.  

Instead, residential integration is calculated at the county level.

This is saved as an intermediate dataset called  "ResInt_county.dta"

*/

* Load data here:

gen native = bpl <= 99

collapse native, by(statefip countyfip)

save "ResInt_county.dta", replace


* Merging in county-level variables to the 2000 census, plus some cleanup:


use "2000_for_counting.dta", clear

// keeping men only
// should already be taken care of at the download stage
keep if sex == 1

// group quarters
keep if gq < 2


// keeping the relevant birthyears 
keep if birthyr >= 1949 & birthyr <= 1952

// dropping native-born american citizens
drop if bpl <= 99

// dropping people born abroad to American parents
drop if citizen == 1

// people who immigrated before their draft
drop if birthyr >= 1948 & birthyr <= 1950 & yrimmig >= 1970
drop if birthyr == 1951 & yrimmig >= 1971
drop if birthyr == 1952 & yrimmig >= 1972
drop if birthyr == 1953 & yrimmig >= 1973

// heads of household
keep if relate == 1 | relate == 2

// merging in residential integration
merge m:1 statefip countyfip using ResInt_county.dta
rename native pnative

* now creating outcome variables:

* Native-born spouse
cap drop spouse_native
gen spouse_native = 0
label variable spouse_native "Native-born spouse"

replace spouse_native = 1 if bpl_sp <= 99 // excludes US outlying territories

label define spouselab1 0 "Non-Native Spouse" 1 "Native-born Spouse", modify 
label values spouse_native spouselab1


* Non-co-national spouse
cap drop spouse_notconatl
gen spouse_notconatl = 0
label variable spouse_notconatl "Non-co-national spouse"

replace spouse_notconatl = 1 if bpl != bpl_sp 

label define spouselab2 0 "Otherwise" 1 "Non-co-national Spouse", modify 
label values spouse_notconatl spouselab2


* Naturalization
cap drop naturalized
gen naturalized = .
label variable naturalized "Naturalized"

label define natlab 0 "Not Naturalized" 1 "Naturalized Citizen", modify
label values naturalized natlab

replace naturalized = 0 if citizen == 3 // not a citizen at the time of the 2000 census
replace naturalized = 1 if citizen == 2 // naturalized citizen at the time of the 2000 census
replace naturalized = . if citizen == 0 // people born in American territories 




* creating a variable for people from exclusively english-speaking countries 	
gen engl_exclusive = 0
label var engl_exclusive "Born in Excl. English-Speaking Area"

replace engl_exclusive = 1 if bpl == 150  					// Canada
replace engl_exclusive = 1 if bpl >= 410 & bpld <= 414 	// UK + Ireland
replace engl_exclusive = 1 if bpl == 700  					// Australia
replace engl_exclusive = 1 if bpl == 700  					// New Zealand



* Only English
cap drop only_engl
gen only_engl = speakeng == 3
label variable only_engl "Only English"

replace only_engl = . if engl_exclusive == 1  //  people who come from exclusively english-speaking countries

label define eng1lab 0 "Not Exclusive English-Speaker" 1 "Speaks Only English", modify
label values only_engl eng1lab


* English ability
cap drop engl_ability
gen engl_ability = .

replace engl_ability = 0 if speakeng == 1 // does not speak english
replace engl_ability = 1 if speakeng == 6 // yes, but not well
replace engl_ability = 2 if speakeng == 5 // yes, speaks well
replace engl_ability = 3 if speakeng == 4 // yes, speaks very well
replace engl_ability = 4 if speakeng == 3 // yes, speaks only english
replace engl_ability = . if engl_exclusive == 1  //  people who come from exclusively english-speaking countries

label variable engl_ability "English ability"

label define eng2lab 0 "Does not speak English" 1 "Yes, but not well" 2 "Yes, speaks well" 3 "Yes, speaks very well" 4 "Speaks only English", modify
label values engl_ability eng2lab

* Spouse Category
cap drop spouse_cat
gen spouse_cat = .
replace spouse_cat = 1 if bpl_sp <= 99
replace spouse_cat = 2 if bpl == bpl_sp 
replace spouse_cat = 3 if bpl != bpl_sp & bpl_sp > 99 & bpl_sp != .

label define spouselab 1 "Native-Born Spouse" 2 "Co-National Spouse" 3 "Other Nationality Spouse", modify
label values spouse_cat spouselab


* Figure A5.3: Naturalization and English Language Ability
* ---------------------------------------------------------

graph bar (percent) [fweight = perwt], over(engl_ability, label(angle(45)))  by(naturalized, note("")) ///
	blabel(bar, format(%03.2f) size(medsmall)) ///
	ytitle("Percent") ///
	ylab(0(10)50)
graph export "${output}naturalized_englabl.png", as(png) replace


* Figure A5.4: Naturalization and Inter-marriage
* ----------------------------------------------

graph bar (percent) [fweight = perwt], over(spouse_cat, label(angle(45)))  by(naturalized, note("")) ///
	blabel(bar, format(%03.2f) size(medsmall)) ///
	ytitle("Percent") ///
	ylab(0(10)70)
graph export "${output}naturalized_spousecat.png", as(png) replace



* Figure A5.5: English Language Ability and Intermarriage
* -------------------------------------------------------

graph bar (percent) [fweight = perwt], over(engl_ability, label(angle(45)))  by(spouse_cat, note("") row(1)) ///
	blabel(bar, format(%03.2f) size(medsmall)) ///
	ytitle("Percent") ///
	ylab(0(10)70)
graph export "${output}engl_spousecat.png", as(png) replace


* Figure A5.6: Residential Integration and English Language Ability
* -----------------------------------------------------------------

graph box pnative, over(engl_ability, label(alt)) ///
	ytitle("Percent Native-Born in County") 
graph export "${output}pnative_engl.png", as(png) replace


* Figure A5.7:  Residential Integration and Inter-marriage
* --------------------------------------------------------

graph box pnative, over(spouse_cat) ///
	ytitle("Percent Native-Born in County") 
graph export "${output}pnative_spousecat.png", as(png) replace


* Figure A5.8: Residential Integration and Naturalization
* -------------------------------------------------------

graph box pnative, over(naturalized) ///
	ytitle("Percent Native-Born in County") 
graph export "${output}pnative_naturalized.png", as(png) replace



/* 
Figure A8.10: US Military Fatalities in the Vietnam War, 1965–1975

Data from https://www.archives.gov/research/military/vietnam-war/casualty-statistics

Manually download the table: 

DCAS Vietnam Conflict Extract File record counts by INCIDENT OR DEATH DATE (Year) (as of April 29, 2008 )

*/

* Load data here:

// generating cumulative sum
gen cumulative_deaths = sum(deaths)

graph twoway ///
	(bar deaths year, bcolor(gs10) yaxis(1)) ///
	(line cumulative_deaths year, lcolor(black) lpattern(solid) yaxis(2)), ///
	xline(1970, lpattern(dash)) ///
	xtitle("Year") xlab(1965(2)1975) ///
	ytitle("Casualties per year", axis(1)) ylab(0(5000)20000, axis(1)) ///
	ytitle("Cumulative casualties", axis(2)) ylab(0(10000)60000, axis(2)) ///
	legend(off)
graph export "${output}deaths.pdf", replace





/*
Figures A9.11 and A9.12 are created using the 1970 Form 1 census sample and the 2000 5% census sample available via IPUMS.

We start with some preliminary cleanup:
*/ 


* Load data here:

// keeping only the 2000 and 1970 form1 samples
keep if sample == 200001 | sample == 197001

// keeping the relevant birthyears 
keep if birthyr >= 1949 & birthyr <= 1952

// dropping native-born american citizens
drop if bpl <= 99

// dropping people born abroad to American parents
drop if citizen == 1

// dropping people who immigrated after 1970 from the 2000 census
drop if yrimmig > 1969 & sample == 200001

// categorizing immigrants		
cap drop composition
gen composition = .
replace comp = 1 if bpl >= 100 & bpl <= 120 // US Territorials
replace comp = 2 if bpl >= 150 & bpl < 200 // canada + islands
replace comp = 3 if bpl == 200 // mexico
replace comp = 4 if bpl > 200 & bpl <= 300 // rest of latin america
replace comp = 5 if bpl >= 400 & bpl <= 429 // NW Europe
replace comp = 6 if bpl >= 430 & bpl < 500 // SE Europe
replace comp = 5 if bpl == 450 | bpl == 453 // Austria and Germany
replace comp = 7 if bpl >= 500 & bpl < 600 // Asia
replace comp = 6 if bpl == 531 // cyprus
replace comp = 8 if bpl == 600 // Africa
replace comp = 5 if bpl == 700 // Australia + New Zealand

label define complab 1 "US Territories" 2 "Canada" 3 "Mexico" 4 "Rest of Latin America" ///
	5 "NW Europe + Australia + New Zealand" 6 "SE Europe" 7 "Asia" 8 "Africa", modify
label values composition complab

gen comp2 = composition
recode comp2 (4 = 9) (7 = 9) (8 = 9)

label define comp2lab 1 "US Territorials" 2 "Canada" 3 "Mexico" 5 "NW Europe" 6 "S+E Europe" 9 "Rest of World", modify
label values comp2 comp2lab

// saving
save "clean_1970_2000.dta", replace





* Figure A9.11: Comparing 2000 and 1970 Census Samples
* -----------------------------------------------------

use "clean_1970_2000.dta", clear

// keeping men only
keep if sex == 1


// graphing
label define samplab 197001 "1970 Census" 200001 "2000 Census", modify
label values sample samplab

graph bar (count) [fweight = perwt],  over(sample) by(comp2, note("")) ///
	blabel(bar) ///
	ylab(0(20000)80000) ///
	ytitle("Count")
graph export "${output}counts_comparison_nationalities.png", as(png) replace





* Fig A9.12: Mexican-born Individuals in 2000 and 1970
* ----------------------------------------------------

use "clean_1970_2000.dta", clear

// mexican born only
keep if bpl == 200 

// graphing
label define samplab 197001 "1970 Census" 200001 "2000 Census", modify
label values sample samplab

graph bar (count) [fweight = perwt],  over(sample) by(sex, note("")) ///
	blabel(bar) ///
	ylab(0(20000)60000) ///
	ytitle("Count")
graph export "${output}mex_by_sex.png", as(png) replace
